In this particular analysis we will be looking at the trends over time of life expectancy versus a country’s gross domestic product or GDP. Gross domestic product (GDP) is the standard measure of the value added created through the production of goods and services in a country during a certain period. As such, it also measures the income earned from that production, or the total amount spent on final goods and services (less imports)[1].
(If you have time, try creating new plots from other gapminder data!)
library("ggplot2")
source("../bin/chunk-options.R")
knitr_fig_path("01-")
# Silently load in the data so the rest of the lesson works
gapminder <- read.csv("../data/gapminder_data.csv", header = TRUE)
{: .language-r}
head(gapminder)
{: .language-r}
<div data-pagedtable="false">
<script data-pagedtable-source type="application/json">
{"columns":[{"label":[""],"name":["_rn_"],"type":[""],"align":["left"]},{"label":["country"],"name":[1],"type":["chr"],"align":["left"]},{"label":["year"],"name":[2],"type":["int"],"align":["right"]},{"label":["pop"],"name":[3],"type":["dbl"],"align":["right"]},{"label":["continent"],"name":[4],"type":["chr"],"align":["left"]},{"label":["lifeExp"],"name":[5],"type":["dbl"],"align":["right"]},{"label":["gdpPercap"],"name":[6],"type":["dbl"],"align":["right"]}],"data":[{"1":"Afghanistan","2":"1952","3":"8425333","4":"Asia","5":"28.801","6":"779.4453","_rn_":"1"},{"1":"Afghanistan","2":"1957","3":"9240934","4":"Asia","5":"30.332","6":"820.8530","_rn_":"2"},{"1":"Afghanistan","2":"1962","3":"10267083","4":"Asia","5":"31.997","6":"853.1007","_rn_":"3"},{"1":"Afghanistan","2":"1967","3":"11537966","4":"Asia","5":"34.020","6":"836.1971","_rn_":"4"},{"1":"Afghanistan","2":"1972","3":"13079460","4":"Asia","5":"36.088","6":"739.9811","_rn_":"5"},{"1":"Afghanistan","2":"1977","3":"14880372","4":"Asia","5":"38.438","6":"786.1134","_rn_":"6"}],"options":{"columns":{"min":{},"max":[10]},"rows":{"min":[10],"max":[10]},"pages":{}}}
</script>
</div>
{: .output}
tail(gapminder)
{: .language-r}
<div data-pagedtable="false">
<script data-pagedtable-source type="application/json">
{"columns":[{"label":[""],"name":["_rn_"],"type":[""],"align":["left"]},{"label":["country"],"name":[1],"type":["chr"],"align":["left"]},{"label":["year"],"name":[2],"type":["int"],"align":["right"]},{"label":["pop"],"name":[3],"type":["dbl"],"align":["right"]},{"label":["continent"],"name":[4],"type":["chr"],"align":["left"]},{"label":["lifeExp"],"name":[5],"type":["dbl"],"align":["right"]},{"label":["gdpPercap"],"name":[6],"type":["dbl"],"align":["right"]}],"data":[{"1":"Zimbabwe","2":"1982","3":"7636524","4":"Africa","5":"60.363","6":"788.8550","_rn_":"1699"},{"1":"Zimbabwe","2":"1987","3":"9216418","4":"Africa","5":"62.351","6":"706.1573","_rn_":"1700"},{"1":"Zimbabwe","2":"1992","3":"10704340","4":"Africa","5":"60.377","6":"693.4208","_rn_":"1701"},{"1":"Zimbabwe","2":"1997","3":"11404948","4":"Africa","5":"46.809","6":"792.4500","_rn_":"1702"},{"1":"Zimbabwe","2":"2002","3":"11926563","4":"Africa","5":"39.989","6":"672.0386","_rn_":"1703"},{"1":"Zimbabwe","2":"2007","3":"12311143","4":"Africa","5":"43.487","6":"469.7093","_rn_":"1704"}],"options":{"columns":{"min":{},"max":[10]},"rows":{"min":[10],"max":[10]},"pages":{}}}
</script>
</div>
{: .output}
names(gapminder)
{: .language-r}
[1] "country" "year" "pop" "continent" "lifeExp" "gdpPercap"
{: .output}
ncol(gapminder)
{: .language-r}
[1] 6
{: .output}
length(gapminder)
{: .language-r}
[1] 6
{: .output}
dim(gapminder)
{: .language-r}
[1] 1704 6
{: .output}
nrow(gapminder)
{: .language-r}
[1] 1704
{: .output}
A statistical overview can be obtained with
summary():
summary(gapminder)
{: .language-r}
country year pop continent
Length:1704 Min. :1952 Min. :6.001e+04 Length:1704
Class :character 1st Qu.:1966 1st Qu.:2.794e+06 Class :character
Mode :character Median :1980 Median :7.024e+06 Mode :character
Mean :1980 Mean :2.960e+07
3rd Qu.:1993 3rd Qu.:1.959e+07
Max. :2007 Max. :1.319e+09
lifeExp gdpPercap
Min. :23.60 Min. : 241.2
1st Qu.:48.20 1st Qu.: 1202.1
Median :60.71 Median : 3531.8
Mean :59.47 Mean : 7215.3
3rd Qu.:70.85 3rd Qu.: 9325.5
Max. :82.60 Max. :113523.1
{: .output}
We can plot the life expectancy:
plot(lifeExp ~ year, gapminder)
{: .language-r}
ggplot(data = gapminder, mapping = aes(x = gdpPercap, y = lifeExp)) +
geom_point()
{: .language-r}
ggplot(data = gapminder, mapping = aes(x=year, y=lifeExp, by=country, olor=continent)) + geom_line()
{: .language-r}
ggplot(data = gapminder, mapping = aes(x=year, y=lifeExp, by=country, color=continent)) +
geom_line() + geom_point()
{: .language-r}
ggplot(data = gapminder, mapping = aes(x=year, y=lifeExp, by=country)) +
geom_line(mapping = aes(color=continent)) + geom_point()
{: .language-r}
ggplot(data = gapminder, mapping = aes(x = gdpPercap, y = lifeExp)) +
geom_point()
{: .language-r}
ggplot(data = gapminder, mapping = aes(x = gdpPercap, y = lifeExp)) +
geom_point(alpha = 0.5) + scale_x_log10()
{: .language-r}
Scatterplot of GDP vs life expectancy showing logarithmic x-axis data spread
ggplot(data = gapminder, mapping = aes(x = gdpPercap, y = lifeExp)) +
geom_point(alpha = 0.5) + scale_x_log10() + geom_smooth(method="lm")
{: .language-r}
`geom_smooth()` using formula = 'y ~ x'
{: .output}
ggplot(data = gapminder, mapping = aes(x = gdpPercap, y = lifeExp)) +
geom_point(alpha = 0.5) + scale_x_log10() + geom_smooth(method="lm", size=1.5)
{: .language-r}
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
generated.
{: .warning}
`geom_smooth()` using formula = 'y ~ x'
{: .output}
americas <- gapminder[gapminder$continent == "Americas",]
ggplot(data = americas, mapping = aes(x = year, y = lifeExp)) +
geom_line() +
facet_wrap( ~ country) +
theme(axis.text.x = element_text(angle = 45))
{: .language-r}
ggplot(data = americas, mapping = aes(x = year, y = lifeExp, color=continent)) +
geom_line() + facet_wrap( ~ country) +
labs(
x = "Year", # x axis title
y = "Life expectancy", # y axis title
title = "Figure 1", # main title of figure
color = "Continent" # title of legend
) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
{: .language-r}
lifeExp_plot <- ggplot(data = americas, mapping = aes(x = year, y = lifeExp, color=continent)) +
geom_line() + facet_wrap( ~ country) +
labs(
x = "Year", # x axis title
y = "Life expectancy", # y axis title
title = "Figure 1", # main title of figure
color = "Continent" # title of legend
) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
ggsave(filename = "results/lifeExp.png", plot = lifeExp_plot, width = 12, height = 10, dpi = 300, units = "cm")
{: .language-r}
[1] https://data.oecd.org/gdp/gross-domestic-product-gdp.htm
Sources: